import numpy as npimport pandas as pdimport osimport csvimport reimport composite as ciimport inspectfrom functools importreduceimport plotly.express as pxfrom sklearn.preprocessing import RobustScaler# from xml.etree import ElementTree as ET# import requests# import json# import webbrowser
Code
# for VSC users, Latex in Plotly is not working# Workaround from https://github.com/microsoft/vscode-jupyter/issues/8131import plotlyfrom IPython.display import display, HTMLplotly.offline.init_notebook_mode()display( HTML('<script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG"></script>' ))
Code
# show all output from cellsfrom IPython.core.interactiveshell import InteractiveShellInteractiveShell.ast_node_interactivity ="all"# last_exprpd.set_option("display.max_columns", None)
Code
# create output folder if not thereifnot os.path.exists("../output"): os.makedirs("../output")
Country names dict
Code
# load countries dictionarycountry_to_abbrev = ( pd.read_csv("../data/countryAbbr.csv", header=None, index_col=0).squeeze().to_dict())# invert the dictionaryabbrev_to_country =dict(map(reversed, country_to_abbrev.items()))# add additional abbreviationsabbrev_to_country["EL"] ="Greece"abbrev_to_country["GB"] ="United Kingdom"
Beach litter originating from national land-based sources that ends in the beach (%)
PERCENT
900
EN_MAR_BEALIT_BV
Beach litter originating from national land-based sources that ends in the beach (Tonnes)
TONNES
900
EN_MAR_BEALIT_EXP
Exported beach litter originating from national land-based sources (Tonnes)
TONNES
900
EN_MAR_BEALIT_OP
Beach litter originating from national land-based sources that ends in the ocean (%)
PERCENT
900
EN_MAR_BEALIT_OV
Beach litter originating from national land-based sources that ends in the ocean (Tonnes)
TONNES
900
EN_MAR_CHLANM
Chlorophyll-a anomaly, remote sensing (%)
PERCENT
2784
EN_MAR_CHLDEV
Chlorophyll-a deviations, remote sensing (%)
PERCENT
4131
EN_MAR_PLASDD
Floating plastic debris density (count per km2)
NUMBER
3
14.2.1
EN_SCP_ECSYBA
Number of countries using ecosystem-based approaches to managing marine areas (1 = YES; 0 = NO)
NUMBER
33
14.3.1
ER_OAW_MNACD
Average marine acidity (pH) measured at agreed suite of representative sampling stations
PH
903
14.4.1
ER_H2O_FWTL
Proportion of fish stocks within biologically sustainable levels (not overexploited) (%)
PERCENT
422
14.5.1
ER_MRN_MPA
Average proportion of Marine Key Biodiversity Areas (KBAs) covered by protected areas (%)
PERCENT
6049
14.6.1
ER_REG_UNFCIM
Progress by countries in the degree of implementation of international instruments aiming to combat illegal, unreported and unregulated fishing (level of implementation: 1 lowest to 5 highest)
NUMBER
882
14.7.1
EN_SCP_FSHGDP
Sustainable fisheries as a proportion of GDP
PERCENT
5880
14.a.1
ER_RDE_OSEX
National ocean science expenditure as a share of total research and development funding (%)
PERCENT
159
14.b.1
ER_REG_SSFRAR
Degree of application of a legal/regulatory/policy/institutional framework which recognizes and protects access rights for small-scale fisheries (level of implementation: 1 lowest to 5 highest)
NUMBER
882
14.c.1
ER_UNCLOS_IMPLE
Score for the implementation of UNCLOS and its two implementing agreements (%)
PERCENT
63
ER_UNCLOS_RATACC
Score for the ratification of and accession to UNCLOS and its two implementing agreements (%)
PERCENT
63
Check official data by indicator
Code
# check sdg indicator, change SeriesCode to the one you want to check# Example with Average marine acidity (pH) ER_OAW_MNACDsdg14.loc[ sdg14["GeoAreaName"].str.contains(r"^(?=.*United)(?=.*Kingdom)"), "GeoAreaName"] ="United Kingdom"sdg14check = sdg14[sdg14["GeoAreaName"].isin(countries)]sdg14check = sdg14check[sdg14check["SeriesCode"] =="ER_OAW_MNACD"].pivot_table( columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean")mr = sdg14check.columns[-1]sdg14check[[2012, 2016, mr]]missingCountries(sdg14check)
TimePeriod
2012
2016
2021
GeoAreaName
Belgium
8.161000
8.193125
7.8710
Estonia
NaN
NaN
7.7255
Finland
NaN
NaN
NaN
France
8.078667
8.078500
NaN
Netherlands
NaN
NaN
NaN
Spain
8.004000
7.996000
NaN
Sweden
8.137500
8.171750
NaN
United Kingdom
8.071250
8.071333
NaN
Missing countries:
Germany
Denmark
Ireland
Lithuania
Latvia
Poland
Portugal
14.1
(a) Index of coastal eutrophication
We use Gross Nitrogen Balance: Max-Min transformation where the max-value is the maximum value in the period 2012-2021 and the min-value is zero.
# Marine waters affected by eutrophication (km2 and % EEZ), Eurostat# https://ec.europa.eu/eurostat/databrowser/view/sdg_14_60/default/table?lang=eneutro = pd.read_csv("../data/sdg_14_60_linear.csv")eutro["geo"] = eutro["geo"].map(abbrev_to_country).fillna(eutro["geo"])eutro = eutro[eutro["geo"].isin(countries) & (eutro["unit"] =="PC")] # KM2 for areaeutro.set_index(["geo", "TIME_PERIOD"], inplace=True)eutro = eutro.pivot_table( columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean")# maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=besteutro = ( (eutro.loc[:, "2012":mr].max().max() - eutro.loc[:, "2012":mr])/ (eutro.loc[:, "2012":mr].max().max() - eutro.loc[:, "2012":mr].min().min())*100).round(2)# UK missing. We will use the average of the other countries each yeareutro.loc["United Kingdom"] = eutro.mean()mr = eutro.columns[-1]eutro[[2012, 2016, mr]]missingCountries(eutro)
TIME_PERIOD
2012
2016
2019
geo
Belgium
100.000000
100.000000
100.000000
Denmark
100.000000
99.260000
100.000000
Estonia
90.040000
93.360000
87.820000
Finland
90.040000
86.350000
78.970000
France
100.000000
98.890000
97.050000
Germany
100.000000
100.000000
100.000000
Ireland
100.000000
100.000000
100.000000
Latvia
96.680000
95.200000
95.200000
Lithuania
99.260000
92.620000
94.100000
Netherlands
100.000000
100.000000
100.000000
Poland
98.520000
96.310000
92.250000
Portugal
80.810000
89.670000
90.410000
Spain
93.730000
93.360000
78.600000
Sweden
97.420000
91.880000
82.660000
United Kingdom
96.178571
95.492857
92.647143
No missing countries
(b) Floating Plastic Debris Density
We use two indicators:
Plastic Waste kg/ha: Max-Min Transformation where the max-value and min-values are the maximum and minimum in the period 2012-2021.
Recovery Rate of Plastic Packaging: Without further transformation as score is provided dimensionless.
Average marine acidity (pH) measured at agreed suite of representative sampling stations
We use two indicators:
Greenhouse gas emissions under the Effort Sharing Decision (ESD): Max-Min transformation where the max- and min-values are the maximum and minimum in the assessment period (2012-2021)
Carbon Emissions per capita: Max-Min Transformation where the max-value and min-values are the maximum and minimum in the period 2012-2021.
Reconstructed estimates of sea surface pH assessed with GLODAPv2.2021
# Population on 1 January, Eurostat# https://ec.europa.eu/eurostat/databrowser/view/tps00001/pop = pd.read_csv("../data/tps00001.csv")pop["geo"] = pop["geo"].map(abbrev_to_country).fillna(pop["geo"])pop = pop[pop["geo"].isin(countries)]pop = pop.pivot_table( columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean")
Code
# Data to corrobarate the ENV_AIR_GGE data. Includes UK# Fossil CO2 emissions by country (territorial), million tonnes of carbon per year (1MtC = 1 million tonne of carbon = 3.664 million tonnes of CO2)# https://globalcarbonbudget.org/carbonbudget/co2T = pd.read_excel("../data/National_Fossil_Carbon_Emissions_2022v1.0.xlsx", sheet_name=1, skiprows=11)co2T = co2T.Tco2T.columns = co2T.iloc[0, :]co2T.columns = co2T.columns.astype(int)co2T = co2T.rename_axis(index="geo", columns=None)co2T = co2T[co2T.index.isin(countries)]mr = co2T.columns[-1] # most recent yearco2T = co2T *3.664# convert from carbon to co2co2pc = co2T / pop *1000_000# tonnes co2 per capita# maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=bestco2pc = ( (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr])/ (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr].min().min())*100).round(2)co2pc = co2pc.ffill(axis=1) # fill empty values with last available yearco2pc[[2012, 2016, mr]]missingCountries(co2pc)
2012
2016
2021
geo
Belgium
49.53
53.33
57.88
Denmark
67.81
73.53
85.95
Estonia
13.02
13.75
61.61
Finland
47.54
55.14
70.86
France
82.44
86.35
90.68
Germany
41.78
45.19
59.36
Ireland
57.48
55.63
64.73
Latvia
98.05
98.19
96.65
Lithuania
88.46
89.81
86.82
Netherlands
44.28
45.44
59.73
Poland
55.26
55.61
54.39
Portugal
88.79
87.58
95.57
Spain
78.08
81.07
87.13
Sweden
86.97
91.69
100.00
United Kingdom
63.17
76.84
87.67
No missing countries
14.4
Proportion of fish stocks within biologically sustainable levels
Coverage of protected areas in relation to marine area
We consider two indicators:
Coverage of protected areas in relation to marine areas: Distance to Reference transformation where the max-value is set to 30% and the min-value is 0%
OHI ‘Biodiversity’ Index : No further transformation
1. Marine protected areas (% of territorial waters)
# EEZ area# Data: https://emodnet.ec.europa.eu/geonetwork/srv/eng/catalog.search#/metadata/d4a3fede-b0aa-485e-b4b2-77e8e3801fd0# https://www.europarl.europa.eu/factsheets/en/sheet/100/outermost-regions-ors-outermost = [# Portugal"Azores","Madeira",# Spain"Canary Islands",# we could include outermost regions of France, but we care EEZs in# the Atlantic, Baltic and North Sea]eez = pd.read_csv("../data/EMODnet_EEZ_v11_20210506.csv", delimiter=";", encoding="latin-1",)eez = eez[eez["Territory"].isin(outermost + countries)]for country in ["France", "Portugal", "Spain"]: eez.loc[eez["Country"].str.contains(country), "Country"] = countryeez = eez.apply(pd.to_numeric, errors="ignore")eez = eez[["Country", "Area_km2"]].groupby(["Country"]).sum(numeric_only=True)eez = eez[eez.index.isin(countries)]eez
Degree of implementation of international instruments aiming to combat illegal, unreported and unregulated fishing
We use two indicators:
Fishing Subsidies relative to Landings: Max-Min Transformation where the max-value and min-value are the maximum and minimum in the assessment period (2012-2021)
# selection of subsidies by level of risk to fishery sustainability# as per https://stats.oecd.org/Index.aspx?QueryId=121718highRisk = ["I.E.1. Fuel tax concessions","I.A.1. Transfers based on variable input use","I.A.2.1.Support to vessel construction/purchase","I.A.2.2.Support to modernisation","I.A.2.3.Support to other fixed costs","II.A. Access to other countries’ waters","I.B.2. Special insurance system for fishers",]moderateRisk = ["I.B.1. Income support","I.C. Transfers based on the reduction of productive capacity","II.B.1. Capital expenditures","II.B.2. Subsidized access to infrastructure",]uncertainRisk = ["I.D. Miscellaneous direct support to individuals and companies","I.E.2. Other tax exemptions","II.D. Support to fishing communities","II.C. Marketing and promotion","II.E. Education and training","II.F. Research and development","II.H. Miscellaneous support for services to the sector",]noRisk = ["II.G.1. Management expenditures","II.G.2. Stock enhancement programs","II.G.3. Enforcement expenditures",]
Code
# Fisheries Support Estimate# https://stats.oecd.org/Index.aspx?DataSetCode=FISH_FSE# selection as per https://stats.oecd.org/Index.aspx?QueryId=121718fse = pd.read_csv("../data/FISH_FSE.csv")fse = fse[fse["Country"].isin(countries)]# strip variable codes and delete parent variables to avoid double counting# solution from https://stackoverflow.com/q/76183612/14534411separator ="."fse["vCode"] = fse.Variable.str.rsplit(".", n=1).str.get(0)variableAll = fse.vCode.unique().tolist()def is_parent(p, target):return p.startswith(target) andlen(p) >len(target) and p[len(target)] =="."vSupport = []prev =""for s insorted(variableAll) + [""]:if prev andnot is_parent(s, prev): vSupport.append(prev) prev = sfse = fse[(fse.vCode.isin(vSupport)) | (fse.vCode.isna())]# we use US dollars because some countries use their own currency ('Danish Krone', 'Zloty', 'Swedish Krona')fse = fse[fse.Measure =="US dollar"][["Country", "Variable", "Value", "Year", "Unit"]]fseRisk = fse[fse.Variable.isin(highRisk)]fseRisk = fseRisk.groupby(["Country", "Year"]).sum(numeric_only=True)negativeValues(fseRisk)fse = fse.groupby(["Country", "Year"]).sum(numeric_only=True)# fishery sustainability high risk subsidies over total subsidiesfseRisk = (fseRisk / fse *100).pivot_table( index="Country", columns="Year", values="Value")# fill missing values with nearest, then backfill and forward fillfseRisk = ( fseRisk.interpolate(method="nearest", axis=1, limit_direction="both") .bfill(axis=1) .ffill(axis=1))# Finland is missing, we use the mean per yearfseRisk.loc["Finland"] = fseRisk.mean()fseRisk[[2012, 2016, 2020]]missingCountries(fseRisk)
selectedIUU = ["Accepted FAO Compliance Agreement","Mandatory vessel tracking for commercial seagoing fleet","Operate a national VMS/FMC centre","Designated ports specified for entry by foreign vessels","Ratification/accession of UNCLOS Convention","Ratification/accession of UNFSA","Have a NPOA-IUU ","Compliance with RFMO flag state obligations","Compliance with RFMO port state obligations",]
# Livelihoods - Quality# Hours Worked https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_en# Select Insight Advisor, search in Asset for the measure and select Dimensions: year, member state# hoursWorked = pd.read_excel('../data/blueEconomyObservatory/hoursWorked.xlsx')# hoursWorked.rename(columns={'Member State':'country_name','Year':'year','Hours worked by employees':'hoursWorked'}, inplace=True)# gvaHoursW = hoursWorked.merge(gvaEcon.reset_index(), on=['country_name','year'], how='left')# gvaHoursW['gvaHoursW'] = gvaHoursW['value'] / gvaHoursW['hoursWorked'] * 1_000_000# gvaHoursW = gvaHoursW[['country_name','year','gvaHoursW']].set_index(['country_name','year'])# gvaHoursW.pivot_table(index='country_name', columns='year', values='gvaHoursW')# Above, I calculated the GVA per hour worked ratio using the raw data of each variable.# The result is different from the ratio variable provided by the Blue Economy Observatory. I emailed them to ask for clarification.gvaHoursW = pd.read_excel("../data/blueEconomyObservatory/GVAperHourWorked.xlsx").set_index(["Year", "Member State"])gvaHoursW.rename( columns={"Gross value added per hour worked by employees": "gvaHoursW"}, inplace=True,)gvaHoursW["gvaHoursW"] = gvaHoursW["gvaHoursW"].apply(pd.to_numeric, errors="coerce")# Comparative price levels https://ec.europa.eu/eurostat/databrowser/view/TEC00120/default/table?lang=enpppEurostat = pd.read_csv("../data/pppEurostat.csv")pppEurostat.geo = pppEurostat.geo.replace(abbrev_to_country)pppEurostat = ( pppEurostat[pppEurostat.geo.isin(countries)] .rename(columns={"geo": "Member State", "TIME_PERIOD": "Year", "OBS_VALUE": "ppp"})[ ["ppp", "Member State", "Year"] ] .set_index(["Member State", "Year"]))gvaHoursW = gvaHoursW.merge(pppEurostat, left_index=True, right_index=True)gvaHoursW["gvaHoursWppp"] = gvaHoursW["gvaHoursW"] / gvaHoursW["ppp"] *100gvaHoursW = gvaHoursW[gvaHoursW.index.get_level_values("Year").isin(range(2012, 2023))]gvaHoursW["gvaHoursWscore"] = ( (gvaHoursW.gvaHoursW - gvaHoursW.gvaHoursW.min())/ (gvaHoursW.gvaHoursW.max() - gvaHoursW.gvaHoursW.min())*100).round(2)# Denmakr and France are missing.# We fill them with the mean of the other countries by yearprint("Missing countries: \n", gvaHoursW[gvaHoursW.gvaHoursW.isna()] .index.get_level_values("Member State") .unique() .to_list(),)gvaHoursW["gvaHoursWscore"] = gvaHoursW.groupby("Year")["gvaHoursWscore"].transform(lambda x: x.fillna(x.mean()))
Missing countries:
['Denmark', 'France']
Code
# Livelihoods - Quantity# FTE employees https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_enfteEmployment = pd.read_excel("../data/blueEconomyObservatory/employmentFTE.xlsx")fteEmployment.rename( columns={"Member State": "country_name","Year": "year","Employees in full time equivalent units": "fteEmployees", }, inplace=True,)# Coastal areas https://ec.europa.eu/eurostat/web/coastal-island-outermost-regions/methodologycoastArea = pd.read_excel("../data/NUTS2021Coastal.xlsx", sheet_name="Coastal regions")[ ["NUTS_ID", "COASTAL CATEGORY"]]coastArea.rename( columns={"NUTS_ID": "geo", "COASTAL CATEGORY": "coastal"}, inplace=True)# Population https://ec.europa.eu/eurostat/databrowser/view/DEMO_R_PJANAGGR3__custom_7060174/default/table?lang=enpopulation = pd.read_csv("../data/demoNUTS3.csv")coastalPop = coastArea.merge(population, on="geo", how="left")# France and UK have three letter abbreviation codescoastalPop["country"] = coastalPop.geo.str.extract(r"([a-zA-Z]{2})").replace( abbrev_to_country)coastalPop = coastalPop[coastalPop.country.isin(countries)]coastalPop = coastalPop.dropna(subset=["TIME_PERIOD"])coastalPop["TIME_PERIOD"] = coastalPop["TIME_PERIOD"].astype(int)coastalPop = ( coastalPop.groupby( ["country","TIME_PERIOD","coastal", ] ) .sum(numeric_only=True) .reset_index())# 0 non-coastal, 1 coastal (on coast), 2 coastal (>= 50% of population living within 50km of the coastline)coastalPop = ( coastalPop[coastalPop["coastal"].isin([1, 2])] .groupby(["country", "TIME_PERIOD"]) .sum())fteCoastalPop = coastalPop.merge( fteEmployment, left_on=["country", "TIME_PERIOD"], right_on=["country_name", "year"], how="inner",)fteCoastalPop["fteCoastalPop"] = ( fteCoastalPop["fteEmployees"] / fteCoastalPop["OBS_VALUE"])fteCoastalPop["fteCoastalPopScore"] = ( (fteCoastalPop.fteCoastalPop - fteCoastalPop.fteCoastalPop.min())/ (fteCoastalPop.fteCoastalPop.max() - fteCoastalPop.fteCoastalPop.min())*100).round(2)
Proportion of total research budget allocated to research in the field of marine technology
We use two indicators:
Official UNSD indicator ER_RDE_OSEX: Max-Min transformation where the max-value and min-value are the maximum and minimum in the assessment period (2009-2017) of all European countries (not restricted to the countries in the analysis).
SAD/TAC: Catch-weighted TAC relative to Scientific Advice
Note: ER_RDE_OSEX data comes from Global Ocean Science Report (GOSR) 2020, and goes from 2013 to 2017. Data for 2009-2012 data is available in the UNstats archive (download csv for 29-Mar-19)
1. Ocean science expenditure ER_RDE_OSEX
Several sources
Code
# %%time# National ocean science expenditure as a share of total research and development funding (%), UNstats archive (years 2009-2012)# https://unstats.un.org/sdgs/indicators/database/archive# # read old data by chunks to speed loading and save 14.a.1 to separate file# iter_csv = pd.read_csv('./data/AllData_Onward_20190329.csv', iterator=True, chunksize=1000, encoding='cp1252')# oResearchOld = pd.concat([chunk[chunk.Indicator == '14.a.1'] for chunk in iter_csv])# oResearchOld.to_csv('./data/archive14a1.csv', index=False)oResearchOld = pd.read_csv("../data/archive14a1.csv")oResearchOld = oResearchOld.pivot( index="GeoAreaName", columns="TimePeriod", values="Value")
Code
# National ocean science expenditure as a share of total research and development funding (%), UNstats# https://unstats.un.org/sdgs/dataportal/database# read official data and merge with archive dataoResearch = sdg14[sdg14["SeriesCode"] =="ER_RDE_OSEX"].pivot_table( columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean")oResearch = oResearchOld.merge( oResearch, left_index=True, right_index=True, how="outer")# use all countries in EuropeoResearch = oResearch[oResearch.index.isin(allEurope)].dropna(how="all", axis=1)# fill nan of year 2013 from new report with old report and 2016 with 2017oResearch[2013] = oResearch["2013_y"].fillna(oResearch["2013_x"])# oResearch[2016] = oResearch[2016].fillna(oResearch[2017])oResearch = oResearch[list(range(2013, 2018))]# weighted by EEZ areaoResearch = oResearch.merge(eez, left_index=True, right_index=True, how="outer")for col in oResearch.drop("Area_km2", axis=1).columns: oResearch[col] = ( oResearch[col] * oResearch["Area_km2"] / (oResearch["Area_km2"].sum()) )# maxMin transformation as (x - min)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=bestoResearch = ( (oResearch.loc[:, 2013:2017] - oResearch.loc[:, 2013:2017].min().min())/ ( oResearch.loc[:, 2013:2017].max().max()- oResearch.loc[:, 2013:2017].min().min() )*100).round(2)# fill nan with mean of columnfor col in oResearch.columns: oResearch[col] = oResearch[col].fillna(oResearch[col].mean())oResearch = oResearch[oResearch.index.isin(countries)]oResearch[[2013, 2016, 2017]]missingCountries(oResearch)
# Percentage of Fish Species Threatened# Time series comparison is tricky: https://www.iucnredlist.org/assessment/red-list-indexthreatenedFish = pd.read_csv("../data/threatenedFishIUCN.csv")threatenedFish = threatenedFish.pivot_table( index="name", columns="year", values="threatenedScore")threatenedFishmissingCountries(threatenedFish)
year
2012
2016
2022
name
Belgium
77.777778
87.786260
84.967320
Denmark
83.516484
89.944134
86.138614
Estonia
80.952381
89.473684
90.243902
Finland
81.818182
89.189189
90.000000
France
88.135593
93.216080
91.332611
Germany
77.611940
87.022901
82.894737
Ireland
86.592179
93.750000
91.885965
Latvia
77.272727
87.804878
88.636364
Lithuania
77.272727
87.500000
88.372093
Netherlands
83.132530
91.489362
89.047619
Poland
76.923077
87.234043
86.274510
Portugal
89.573460
94.731065
92.578125
Spain
89.938398
94.646465
92.513863
Sweden
77.272727
87.121212
84.027778
United Kingdom
86.729858
93.827160
91.337100
No missing countries
14.c
Number of countries making progress in ratifying, accepting and implementing through legal, policy and institutional frameworks, ocean-related instruments that implement international law, as reflected in the United Nations Convention on the Law of the Sea
We use two indicators:
Participation in agreements of the International Marine Organization (IMO Participation Rate):
Mining Area/EEZ (Deep Sea Minining)
1. Participation in agreements of the International Marine Organization
# dictionary for country codes used by the GISISwithopen("../data/gisisDict.csv") as csv_file: reader = csv.reader(csv_file) gisisDict =dict(reader)gisisDict = {v: k for k, v in gisisDict.items()}
Code
# Participation in agreements of the International Marine Organization# https://www.imo.org/en/About/Conventions/Pages/StatusOfConventions.aspx Excel file with current status of IMO conventions# We get the historical data from the GISIS database: https://gisis.imo.org/Public/ST/Ratification.aspx# You need to create account to access data.# I tried to scrape the data but I am getting errors with Selenium and bs4.# I downloaded the html manuallygisisCountries = {k: v for k, v in gisisDict.items() if k in countries}listIMO = []for v in gisisCountries.values(): link ="https://gisis.imo.org/Public/ST/Ratification.aspx?cid="+str(v) listIMO.append(link)# for i in listIMO:# webbrowser.open(i)
Code
imoRatedf = pd.DataFrame(columns=["Country", 2012, 2016, 2021])# loop thru the html files in the folder and extract the datafor i inrange(len(os.listdir("../data/treatiesIMO/"))): countryIMO = np.nan imoRate = pd.read_html("../data/treatiesIMO/Status of Treaties » Ratification of Treaties{}.html".format( i ) )[4]# get the country name from the html file name by checking if string is in the list of countriesfor country in countries:if country in imoRate["Treaty"][0]: countryIMO = countryif countryIMO isnot np.nan: imoRate.columns = imoRate.iloc[1] imoRate = imoRate[2:]# new columns with the year of accession and denounced imoRate["accession"] = ( imoRate["Date of entry into force in country"] .str.extract("^([^(]+)") .fillna("") ) imoRate["denounced"] = imoRate["Date of entry into force in country" ].str.extract(".*\\:(.*)\\).*") imoRate[["accession", "denounced"]] = ( imoRate[["accession", "denounced"]] .apply(pd.DatetimeIndex) .apply(lambda x: x.dt.year) )# count the number of treaties each country accessioned and didn't denounced by 2012, 2016 and 2021for i in (2012, 2016, 2021): imoRate[str(i)] = np.where( (imoRate.accession < i)& ((imoRate.denounced > i) | (imoRate.denounced.isna())),1,0, ) imoCount = ( countryIMO, imoRate["2012"].sum(), imoRate["2016"].sum(), imoRate["2021"].sum(), ) imoRatedf.loc[len(imoRatedf), imoRatedf.columns] = imoCount# calculate total possible treaties, apply dif-ref and convert to percentagetotalIMO =len(imoRate.dropna(subset=["Date of entry into force in country"]))imoRatedf = imoRatedf.set_index("Country").sort_index()imoRatedf =100-100* (totalIMO - imoRatedf).divide(totalIMO)imoRatedf = imoRatedf.astype(np.float64)imoRatedf = imoRatedf.apply(pd.to_numeric)imoRatedfmissingCountries(imoRatedf)
Given our ratio-scale full comparable indicators,, meaningful aggregation of indicators into a composite indicator is obtained according to social choice theory by applying a generalized mean:
with weights and $0 ≤ ≤ $. The parameter is used to quantify the elasticity of substitution between the different indicators. High (low) values of imply good (poor) substitution possibilities which means that a high score in one indicator can (cannot) compensate a low score in another indicator. Consequently, high and low values of correspond to concepts of weak and strong sustainability, respectively. Depending on , one can obtain a full class of specific function forms for the composite indicator.
We define:
and
Code
varDf = [ nitro, eutro, wasteG, wasteR, mspVal, ohiBio, co2pc, phScore, fmsyF, bBmsy, kbaMPA, natura2kEEZ, fseRisk, tacCatch, compositeIUU, livelihoodsAgg, economies, gvaNights, oResearch, sadTac, ohiArt, threatenedFish, imoRatedf, seaMining,]varNames = ["nitro","eutro","wasteG","wasteR","mspVal","ohiBio","co2pc","phScore","fmsyF","bBmsy","kbaMPA","natura2kEEZ","fseRisk","tacCatch","compositeIUU","livelihoodsAgg","economies","gvaNights","oResearch","sadTac","ohiArt","threatenedFish","imoRatedf","seaMining",]dictIndicators =dict(zip(varNames, varDf))# stack variables in each dataframefor name, df in dictIndicators.items(): df = df.stack().to_frame().rename(columns={0: str(name)}) df.index.names = ["Country", "Year"] df.reset_index(inplace=True) df.Year = df.Year.astype(int) df.set_index(["Country", "Year"], inplace=True) dictIndicators[name] = df# merge all variables into one dataframe, forward and back fill by countryindicators = pd.concat(dictIndicators.values(), axis=1, join="outer")indicators = indicators.reset_index().sort_values(["Country", "Year"])indicators = indicators[indicators.Year.isin(list(range(2012, 2022)))]indicators = indicators.groupby(["Country"], group_keys=False).apply(lambda x: x.ffill().bfill())indicators = indicators.set_index(["Country", "Year"])indicatorsL = {"plastic": ["wasteG", "wasteR"],"14.1": ["nitro", "eutro", "plastic"],"14.2": ["mspVal", "ohiBio"],"14.3": ["co2pc", "phScore"], # dropped "scoreESD""14.4": ["fmsyF", "bBmsy"],"14.5": ["kbaMPA", "natura2kEEZ"],"14.6": ["fseRisk", "tacCatch", "compositeIUU"],"14.7": ["livelihoodsAgg", "economies", "gvaNights"],"14.a": ["oResearch", "sadTac"],"14.b": ["ohiArt", "threatenedFish"],"14.c": ["imoRatedf", "seaMining"],}# calculate composite indicators for each target importing the function from composite.pytargets = indicators.copy()for target, indicator in indicatorsL.items():try: alpha =1/len(indicatorsL[target]) df = targets[indicator] sigma =10 targets[target] = ci.compositeDF(alpha, df, sigma)exceptKeyError:print("Missing indicator for", target)targets = targets[[i for i in targets if i.startswith("14")]]
%%time# calculate composite score for each target importing the function from composite.py# monte carlo for strong sustainability and one sigma for weak sustainabilityscoresStrong = ci.compositeMC(df = targets, years=[2012, 2016, 2021], simulations=10_000)scoresWeak = pd.DataFrame(ci.compositeDF(alpha =1/len(targets.columns), df = targets, sigma =10), columns=['scoreWeak'])scoresStrong = scoresStrong[scoresStrong.index.get_level_values('Country').isin(countries)]
CPU times: user 18.9 s, sys: 8.42 ms, total: 18.9 s
Wall time: 18.9 s
# merge all data and calculate EEZ-weigthed average for indicators and targetsdata_frames = [indicators, targets, scoresStrong, scoresWeak]fullData =reduce(lambda left, right: pd.merge( left, right, left_index=True, right_index=True, how="inner" ), data_frames,)eezAverage = ( pd.DataFrame(fullData.stack().reset_index()) .merge(eez, left_on="Country", right_on="Country", how="left") .rename(columns={0: "value", "level_2": "indicator"}))eezAverage["valueEEZ"] = eezAverage.value * eezAverage.Area_km2eezAverage = ( eezAverage.groupby(["Year", "indicator"]) .sum(numeric_only=True) .drop("value", axis=1))eezAverage["averageEEZ"] = eezAverage.valueEEZ / eezAverage.Area_km2
Code
# define function to plot boxplotsdef plotBox( df, xaxis_title=str, yaxis_title=str, xlegend=float, ylegend=float, maxShift=float, minShift=float,):# start plots fig = px.box(df, x="indicator", y="value")# add Countries in the max and min# create annotation list, x is the x-axis position of the annotation annoList = [] maxCountriesD = {} x =0for s in df.indicator.unique():# get max value for indicator maxVal = np.max(df[df["indicator"] == s]["value"])# get countries with max value, if more than 4 countries, use * countries = df.loc[ (df["value"] == maxVal) & (df["indicator"] == s), "Country" ].valuesiflen(countries) >4: maxCountriesD[s] = countries countries ="*" countries =",<br>".join(countries) annotation =dict( x=x,# y position is the max value y=maxVal, text=countries, yshift=maxShift, showarrow=False, ) annoList.append(annotation) x +=1 minCountriesD = {} x =0for s in df.indicator.unique():# get min value for indicator minVal = np.min(df[df["indicator"] == s]["value"])# get countries with min value, if more than 4 countries, use * and store in dictionary countries = df.loc[ (df["value"] == minVal) & (df["indicator"] == s), "Country" ].valuesiflen(countries) >4: minCountriesD[s] = countries countries ="*" countries =",<br>".join(countries) annotation =dict( x=x,# y position is the min value y=minVal, text=countries, yshift=minShift, showarrow=False, ) annoList.append(annotation) x +=1# add EEZ-weigthed average values indicatorAverage = eezAverage.loc[(2021, df.indicator.unique()), :].reset_index()for s in indicatorAverage.indicator.unique():# get EEZ-weigthed average value for indicator averageVal =float( indicatorAverage[indicatorAverage["indicator"] == s]["averageEEZ"] ) fig.add_scatter( x=[s],# y position is the average value y=[averageVal],type="scatter", mode="markers", legendgroup="EEZ average", name="EEZ average", marker=dict(color="black", size=6), ) fig.update_layout(annotations=annoList)# just to add the legend with one entry fig.update_traces(showlegend=False).add_scatter( x=[s], y=[averageVal],type="scatter", mode="markers", legendgroup="EEZ average", name="EEZ average", marker=dict(color="black", size=6), )# change legend position, axis titles fig.update_layout( legend=dict( x=xlegend, y=ylegend, traceorder="normal", font=dict(family="sans-serif", size=12, color="black"), bgcolor="White", ), xaxis_title=xaxis_title, yaxis_title=yaxis_title,# yaxis_range=[-20,100] ) fig.show()
# These data SHALL NOT BE USED. See reason on why ENV_AIR_GGE is preferable for the calculation:# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_emission_inventories)# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_air_emissions_accounts&oldid=551152#Greenhouse_gas_emissions)# CO2, KG_HAB, Eurostat# https://ec.europa.eu/eurostat/databrowser/view/ENV_AC_AINAH_R2/# co2 = pd.read_csv('../data/env_ac_ainah_r2.csv')# co2 = co2[co2['geo'].isin(countryAbb)]# co2['geo'] = co2['geo'].map(abbrev_to_country).fillna(co2['geo'])# co2 = co2.pivot_table(columns='TIME_PERIOD', index='geo', values='OBS_VALUE', aggfunc='mean')# mr = co2.columns[-1] # most recent year# # co2[[2012, 2016, mr]]/1000
1. Greenhouse gas emissions under the Effort Sharing Decision (ESD)
Several sources (see code)
2. Carbon emissions per capita
Several sources (see code)
Code
# # CO2, TOTX4_MEMONIA, THS_T thousand tonnes, Eurostat# # https://ec.europa.eu/eurostat/databrowser/view/ENV_AIR_GGE/# co2 = pd.read_csv("../data/env_air_gge.csv")# co2["geo"] = co2["geo"].map(abbrev_to_country).fillna(co2["geo"])# co2 = co2[# co2["geo"].isin(countries)# & (co2.airpol == "CO2")# & (co2.src_crf == "TOTX4_MEMONIA")# ]# co2 = co2.pivot_table(# columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"# )# mr = co2.columns[-1] # most recent year# co2pc = co2 / pop * 1000 ## tonnes co2 per capita# co2pc.dropna(how="all", inplace=True)# # maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best# co2pc = (# (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr])# / (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr].min().min())# * 100# ).round(2)# co2pc[[2012, 2016, mr]]# missingCountries(co2pc)
Code
# # Greenhouse gas emissions under the Effort Sharing Decision (ESD), Million tonnes CO2 equivalent (Mt CO2 eq), European Environment Agency# # https://www.eea.europa.eu/data-and-maps/data/esd-4# ghgESD = pd.read_excel("../data/EEA_ESD-GHG_2022.xlsx", sheet_name=1, skiprows=11)# ghgESD = ghgESD[ghgESD["Geographic entity"].isin(countries)]# ghgESD = ghgESD.set_index("Geographic entity")# ghgESD = ghgESD.dropna(axis=1, how="all")# negativeValues(ghgESD)# mr = ghgESD.columns[-1] # most recent year# ghgESD = ghgESD * 1_000_000
Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)
Code
# # Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)# scoreESD2020 = 100 - 100 * (# ghgESD.loc[:, :2020].subtract(allocationESD[2020], axis=0)# ).divide(allocationESD[2020], axis=0)# scoreESD2030 = 100 - 100 * (# ghgESD.loc[:, 2021:].subtract(allocationESD[2030], axis=0)# ).divide(allocationESD[2030], axis=0)# scoreESD1 = scoreESD2020.merge(scoreESD2030, left_index=True, right_index=True)# scoreESD1[scoreESD1 > 100] = 100# # scoreESD1[[2012, 2018, 2021]]
Code
# # Effort sharing regulation# # Get the targets for 2020 and 2030 in percentage# # Member State greenhouse gas emission limits in 2020 and 2030 compared to 2005 greenhouse gas emissions levels# # There targets for 2020 and for 2030# # https://www.eea.europa.eu/data-and-maps/figures/national-progress-towards-greenhouse-gas# # Official journals with the data can be found at (https://climate.ec.europa.eu/eu-action/effort-sharing-member-states-emission-targets_en)# limitESR = pd.read_excel('../data/targetsESR/FIG2-154307-CLIM058-v2-Data.xlsx', sheet_name=1, skiprows=19, header=1, skipfooter=32)# limitESR = limitESR.rename(columns={'Unnamed: 0':'Country', '(Resulting) ESR target 2030 (AR4)':'2030ESRtarget','ESR limit for 2020':'2020ESRtarget', '2005 ESD BJ':'2005Level'})# limitESR = limitESR[['Country', '2020ESRtarget','2030ESRtarget','2005Level']]# limitESR.set_index('Country', inplace=True)# limitESR = limitESR[limitESR.index.isin(countries)]# #UK is not in the dataset, we need to add from the official journal# limitESR# missingCountries(limitESR)
# From 14.6, using Eurostat data for landing values.# Even though the USD-EUR discrepancy does not affect the ratio we calculate,# we get today's exchange rate to convert landing values to USD and have a consistent unit# Solution source: https://stackoverflow.com/a/17250702/14534411# r = requests.get('http://www.ecb.int/stats/eurofxref/eurofxref-daily.xml', stream=True)# tree = ET.parse(r.raw)# root = tree.getroot()# namespaces = {'ex': 'http://www.ecb.int/vocabulary/2002-08-01/eurofxref'}# currency = 'USD'# match = root.find('.//ex:Cube[@currency="{}"]'.format(currency.upper()), namespaces=namespaces)# eurTOusd = float(match.attrib['rate'])# Landings of fishery products, TOTAL FISHERY PRODUCTS, Euro, Eurostat# https://ec.europa.eu/eurostat/databrowser/view/FISH_LD_MAIN/# landing = pd.read_csv('../data/fish_ld_main.csv')# landing['Country'] = landing.geo.map(abbrev_to_country).fillna(landing.geo)# landing = landing[landing.Country.isin(countries)]# landing = landing[['Country', 'TIME_PERIOD', 'OBS_VALUE', 'unit']]# landing['landingUSD'] = landing.OBS_VALUE * eurTOusd# landing.pivot_table(columns='TIME_PERIOD', index='Country', values='landingUSD', aggfunc='mean')
# # Old MPA data (similar results to Natura2k, even though they use the Protected Planet data)# # Marine protected areas (% of territorial waters), World Bank aggregation of https://www.protectedplanet.net/en/thematic-areas/marine-protected-areas# # https://data.worldbank.org/indicator/ER.MRN.PTMR.ZS# mpa = pd.read_csv("../data/API_ER.MRN.PTMR.ZS_DS2.csv", skiprows=4)# mpa = mpa[mpa["Country Name"].isin(countries)].set_index("Country Name")# mpa = mpa.dropna(axis=1, how="all")# mpa = mpa.drop(["Country Code", "Indicator Name", "Indicator Code"], axis=1)# # display all rows# negativeValues(mpa)# mpa = (mpa / 0.3).round(2) # dis-ref with target 30%# mpa[mpa > 100] = 100# mpa.sort_index(inplace=True)# mpa[["2016", "2021"]]# missingCountries(mpa)
Measures under the Marine Strategy Framework Directive (DROPPED)
Code
# The barplot here: https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018DC0562&from=EN# Comes from https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018SC0393# The analogous report for 2020 is here https://environment.ec.europa.eu/system/files/2023-04/C_2023_2203_F1_COMMUNICATION_FROM_COMMISSION_EN_V5_P1_2532109.PDF# But the assessment is much shorter. They refer the reader to a JRC report:# https://publications.jrc.ec.europa.eu/repository/handle/JRC129363# That report assesses all the descriptors, but it cannot be compared to the previous assessment.# Moreover, the source code and data are not available.# Overall, it is hard to make an indicator for the measures taken against pressure indicators by the MS.# Countries report different measures and data is poor.